ohibc logo
OHI British Columbia | OHI Science | Citation policy

knitr::opts_chunk$set(fig.width = 6, fig.height = 4, fig.path = 'figs/',
                      echo = TRUE, message = FALSE, warning = FALSE)

library(raster)
library(data.table)

source('https://raw.githubusercontent.com/oharac/src/master/R/common.R')
  ### includes library(tidyverse); library(stringr); 
  ### dir_M points to ohi directory on Mazu; dir_O points to home dir on Mazu

dir_git <- '~/github/spp_risk_dists'

### goal specific folders and info
dir_setup <- file.path(dir_git, 'data_setup')
dir_data  <- file.path(dir_git, 'data')
dir_o_anx <- file.path(dir_O, 'git-annex/spp_risk_dists')

source(file.path(dir_git, 'data_setup/api_fxns.R'))

1 Summary

Compare MPAs to biodiversity intactness:

2 Methods

2.1 set up taxonomic groups

shp_to_taxa <- read_csv(file.path(dir_git, 'data_setup/raw', 'shps_to_taxa.csv'))

taxa_sums <- data.frame(unweighted = list.files(file.path(dir_o_anx, 'taxa_summaries'),
                                                pattern = sprintf('cell_sum_%s.csv', api_version),
                                                full.names = TRUE),
                        rr_weighted = list.files(file.path(dir_o_anx, 'taxa_summaries'),
                                                pattern = sprintf('cell_sum_rrweight_%s.csv', api_version),
                                                full.names = TRUE)) %>%
  mutate(shp_basename = str_replace_all(basename(unweighted), '_cell_sum.+|_part.+', '')) %>%
  left_join(shp_to_taxa, by = 'shp_basename')


# spp_taxonomy <- read_csv(file.path(dir_o_anx, sprintf('iucn/spp_info_from_api_%s.csv', api_version))) %>%
#   select(iucn_sid, sciname, kingdom, phylum, class)
# 
# spp_info <- read_csv(file.path(dir_git, 'data', sprintf('spp_marine_maps_%s.csv', api_version)),
#                      col_types = 'ddciccc') %>% 
#   select(iucn_sid, sciname, dbf_file) %>%
#   left_join(spp_taxonomy %>% select(-sciname), by = 'iucn_sid')

  
# phyla_summaries <- spp_info %>%
#   mutate(phylum = tolower(phylum), class = tolower(class)) %>%
#   group_by(phylum) %>%
#   mutate(n_phylum = n()) %>%
#   group_by(phylum, class, n_phylum) %>%
#   summarize(n_class = n()) %>%
#   ungroup() %>%
#   mutate(taxa_gp = ifelse(phylum == 'chordata', 
#                           paste0('Chordata: ', tools::toTitleCase(class)), 
#                           tools::toTitleCase(phylum)),
#          n_gp = ifelse(phylum == 'chordata', n_class, n_phylum)) %>%
#   select(taxa_gp, n_gp) %>%
#   distinct()


# spp_info_all_marine <- read_csv(file.path(dir_git, 'data', 
#                                           sprintf('spp_marine_from_api_%s.csv', api_version))) %>%
#   left_join(spp_taxonomy, by = 'iucn_sid')
# 
# spp_info_all_marine$class %>% unique()
#  [1] "ACTINOPTERYGII"     "REPTILIA"           "MAMMALIA"           "CHONDRICHTHYES"    
#  [5] "GASTROPODA"         "MAXILLOPODA"        "ENOPLA"             "CEPHALASPIDOMORPHI"
#  [9] "SARCOPTERYGII"      "MEROSTOMATA"        "AMPHIBIA"           "INSECTA"           
# [13] "MAGNOLIOPSIDA"      "PINOPSIDA"          "LILIOPSIDA"         "CYCADOPSIDA"       
# [17] "FLORIDEOPHYCEAE"    "ANTHOZOA"           "ULVOPHYCEAE"        "PHAEOPHYCEAE"      
# [21] "HYDROZOA"           "MALACOSTRACA"       "MYXINI"             "BIVALVIA"          
# [25] "CEPHALOPODA"        "POLYPODIOPSIDA"     "HOLOTHUROIDEA"      "AVES"              
# [29] "LYCOPODIOPSIDA"     "CLITELLATA" 

2.1.1 histograms of risk and threatened spp by taxa

spp_info <- read_csv(file.path(dir_git, 'data', sprintf('spp_marine_maps_%s.csv', api_version)),
                     col_types = 'ddciccc') %>%
  mutate(shp_basename = str_replace_all(basename(tolower(dbf_file)), '.dbf|_part.+', '')) %>%
  select(iucn_sid, sciname, shp_basename) %>%
  left_join(taxa_sums, by = 'shp_basename')

taxa_gps <- spp_info$taxon %>% unique()

reload <- FALSE

for(taxon_gp in taxa_gps) { ### taxon_gp <- taxa_gps[1]
  
  taxon_txt <- taxon_gp %>% tolower() %>% str_replace_all('chordata: |[^a-z]+', '')
  dens_plot_file <- file.path(dir_git, 'ms_figures/taxa_figs',
                             sprintf('fig_SI_density_risk_%s.png',
                                     taxon_txt))
  threat_plot_file <- file.path(dir_git, 'ms_figures/taxa_figs',
                             sprintf('fig_SI_pct_threat_%s.png',
                                     taxon_txt))
  
  if(any(!file.exists(c(dens_plot_file, threat_plot_file))) | reload == TRUE) {

    taxa_info <- spp_info %>%
      filter(taxon == taxon_gp)
    
    n_spp_in_taxa <- unique(taxa_info$iucn_sid) %>% length()
        
    cat(paste0('\n\n#### ', taxon_gp, ' (', n_spp_in_taxa, ' species)\n\n'))
    message('Processing ', taxon_gp)
    
    unweighted_files <- taxa_info$unweighted %>% unique()
    rr_weighted_files <- taxa_info$rr_weighted %>% unique()
    
    taxon_df <- parallel::mclapply(unweighted_files, mc.cores = 12,
        FUN = function (x) { ### x <- unweighted_files[1]
          read_csv(x, col_types = 'dddiidi')
        }) %>% 
      bind_rows() %>%
      group_by(cell_id) %>%
      summarize(mean_risk_sum = 1/sum(n_spp_risk) * sum(mean_risk * n_spp_risk),
                pct_threat_sum = sum(n_spp_threatened) / sum(n_spp_risk),
                n_spp_sum = sum(n_spp_risk)) %>%
      mutate(wt = 'unweighted')
    
    taxon_df_rr <- parallel::mclapply(rr_weighted_files, mc.cores = 12,
        FUN = function (x) { ### x <- rr_weighted_files[1]
          read_csv(x, col_types = 'ddd___d___')
        }) %>% 
      bind_rows() %>%
      group_by(cell_id) %>%
      summarize(mean_risk_sum = 1/sum(sr_rr_risk) * sum(mean_risk * sr_rr_risk),
                pct_threat_sum = sum(sr_rr_threatened) / sum(sr_rr_risk)) %>%
      select(cell_id, mean_risk_sum, pct_threat_sum) %>%
      mutate(wt = 'rr_weighted')
  
    df <- taxon_df %>%
      bind_rows(taxon_df_rr)
    
    x <- ggplot(df, aes(x = mean_risk_sum, fill = wt, color = wt, ..scaled..)) +
      ggtheme_plot() +
      geom_density(alpha = .3, size = .25, bw = .015) +
      theme(axis.text.y  = element_blank(),
            axis.title.y = element_blank()) +
      xlim(0, 1) +
      labs(title = paste0(taxon_gp, ': risk'))
    
    
    ggsave(filename = dens_plot_file, 
           width = 6, height = 4, units = 'in', dpi = 300)

    
    y <- ggplot(df, aes(x = pct_threat_sum, fill = wt, color = wt, ..scaled..)) +
      ggtheme_plot() +
      geom_density(alpha = .3, size = .25, bw = .015) +
      theme(axis.text.y  = element_blank(),
            axis.title.y = element_blank()) +
      xlim(0, 1) +
      labs(title = paste0(taxon_gp, ': pct threatened'))
    
    ggsave(filename = threat_plot_file,
           width = 6, height = 4, units = 'in', dpi = 300)
  }
  
  cat(sprintf('![](%s)\n\n', path.expand(dens_plot_file)))
  cat(sprintf('![](%s)\n\n', path.expand(threat_plot_file)))
}
reload <- FALSE

rast_base <- raster(ext = extent(c(-180, 180, -90, 90)), res = 0.1)
values(rast_base) <- 1:length(rast_base)

land_poly <- sf::read_sf(file.path(dir_git, 'spatial/ne_10m_land/ne_10m_land.shp'))

taxa_gps <- spp_info$taxon %>% unique()

for(taxon_gp in taxa_gps) { ### taxon_gp <- taxa_gps[1]
    
  taxon_txt <- taxon_gp %>% tolower() %>% str_replace_all('chordata: |[^a-z]+', '')
  map_plot_file <- file.path(dir_git, 'ms_figures/taxa_maps',
                             sprintf('fig_SI_map_risk_%s.png',
                                     taxon_txt))
  rr_map_plot_file <- file.path(dir_git, 'ms_figures/taxa_maps',
                             sprintf('fig_SI_map_rr_risk_%s.png',
                                     taxon_txt))
  
  ### Check whether plots already exist.
  if(any(!file.exists(c(map_plot_file, rr_map_plot_file))) | reload == TRUE) {
      
    taxa_info <- spp_info %>%
      filter(taxon == taxon_gp)
    
    n_spp_in_taxa <- unique(taxa_info$iucn_sid) %>% length()
        
    cat(paste0('\n\n#### ', taxon_gp, ' (', n_spp_in_taxa, ' species)\n\n'))
    message('Processing ', taxon_gp)
    
    unweighted_files  <- taxa_info$unweighted %>% unique()
    rr_weighted_files <- taxa_info$rr_weighted %>% unique()
    
    ### process data for unweighted maps
    taxon_df <- parallel::mclapply(unweighted_files, mc.cores = 12,
        FUN = function (x) { ### x <- unweighted_files[1]
          read_csv(x, col_types = 'dddiidi')
        }) %>% 
      bind_rows() %>%
      group_by(cell_id) %>%
      summarize(mean_risk_sum = 1/sum(n_spp_risk) * sum(mean_risk * n_spp_risk),
                pct_threat_sum = sum(n_spp_threatened) / sum(n_spp_risk),
                n_spp_sum = sum(n_spp_risk)) %>%
      mutate(wt = 'unweighted')
    
    ### create raster of unweighted map, then rasterToPoints it.  Could also
    ### do a cell ID to lat/long, but whatevs.
    mean_rast <- subs(rast_base, taxon_df, by = 'cell_id', which = 'mean_risk_sum')

    mean_df <- mean_rast %>% 
      # aggregate(fact = 4) %>%
      rasterToPoints() %>% 
      as.data.frame() %>%
      setNames(c('long', 'lat', 'value'))
    
    
    message('ggplotting map for ', taxon_gp)
    
    x <- ggplot(mean_df) +
      geom_raster(aes(long, lat, fill = value)) +
      geom_sf(data = land_poly, aes(geometry = geometry), fill = 'grey96', color = 'grey40', size = .10) +
      ggtheme_map() +
      theme(panel.background = element_rect(fill = 'grey85')) +
      coord_sf(datum = NA) +
      scale_fill_gradientn(colors = c('green4', 'lightyellow', 'red2', 'red3', 'red4', 'purple4'),
                           limits = c(0, 1),
                           labels = c('LC', 'NT', 'VU', 'EN', 'CR', 'EX'),
                           breaks = c( 0.0,  0.2,  0.4,  0.6,  0.8,  1.0)) +
      scale_x_continuous(expand = c(0, 0)) +
      scale_y_continuous(expand = c(0, 0)) +
      labs(title = paste0('Mean risk: ', taxon_gp),
           fill  = 'Mean risk')
    
    ggsave(filename = map_plot_file, width = 6, height = 4, units = 'in', dpi = 300)
    
    message('now ggplotting the range-rarity map...')
    
    taxon_df_rr <- parallel::mclapply(rr_weighted_files, mc.cores = 12,
        FUN = function (x) { ### x <- rr_weighted_files[1]
          read_csv(x, col_types = 'ddd___d___')
        }) %>% 
      bind_rows() %>%
      group_by(cell_id) %>%
      summarize(mean_risk_sum = 1/sum(sr_rr_risk) * sum(mean_risk * sr_rr_risk),
                pct_threat_sum = sum(sr_rr_threatened) / sum(sr_rr_risk)) %>%
      select(cell_id, mean_risk_sum, pct_threat_sum) %>%
      mutate(wt = 'rr_weighted')
    
    mean_rr_rast <- subs(rast_base, taxon_df_rr, by = 'cell_id', which = 'mean_risk_sum')

    mean_rr_df <- mean_rr_rast %>% 
      # aggregate(fact = 4) %>%
      rasterToPoints() %>% 
      as.data.frame() %>%
      setNames(c('long', 'lat', 'value'))

    x <- ggplot(mean_rr_df) +
      geom_raster(aes(long, lat, fill = value)) +
      geom_sf(data = land_poly, aes(geometry = geometry), fill = 'grey96', color = 'grey40', size = .10) +
      ggtheme_map() +
      theme(panel.background = element_rect(fill = 'grey85')) +
      coord_sf(datum = NA) +
      scale_fill_gradientn(colors = c('green4', 'lightyellow', 'red2', 'red3', 'red4', 'purple4'),
                           limits = c(0, 1),
                           labels = c('LC', 'NT', 'VU', 'EN', 'CR', 'EX'),
                           breaks = c( 0.0,  0.2,  0.4,  0.6,  0.8,  1.0)) +
      scale_x_continuous(expand = c(0, 0)) +
      scale_y_continuous(expand = c(0, 0)) +
      labs(title = paste0('RR-weighted mean risk: ', taxon_gp),
           fill  = 'Mean risk')
  
    ggsave(filename = rr_map_plot_file,
           width = 6, height = 4, units = 'in', dpi = 300)
    
  } else {
    message('Maps exist: \n  ', map_plot_file, '\n  ', rr_map_plot_file)
  }
  
  cat(sprintf('![](%s)\n\n', path.expand(map_plot_file)))
  cat(sprintf('![](%s)\n\n', path.expand(rr_map_plot_file)))

}

2.2 Plots of ranges by taxonomic group

spp_ranges <- read_csv(file.path(dir_data, sprintf('iucn_spp_range_area_%s.csv', api_version)),
                         col_types = 'dd__') %>%
  distinct()

taxa_ranges <- read_csv(file.path(dir_git, 'data', sprintf('spp_marine_maps_%s.csv', api_version)),
                     col_types = 'ddciccc') %>%
  mutate(shp_basename = str_replace_all(basename(tolower(dbf_file)), '.dbf|_part.+', '')) %>%
  left_join(spp_ranges, by = 'iucn_sid') %>%
  left_join(shp_to_taxa, by = 'shp_basename') %>%
  mutate(log_range = log10(range_km2))

ggplot(taxa_ranges, aes(x = log_range, ..scaled..)) +
  ggtheme_plot() +
  theme(panel.grid.major = element_blank(),
        axis.text.y      = element_blank(),
        axis.title.y     = element_blank(),
        strip.text.y = element_text(angle = 0, hjust = 0)) +
  geom_density(color = 'grey20', size = .25, fill = 'grey30') +
  scale_x_continuous(limits = c(0, NA),
                     breaks = c(0:8),
                     labels = 10^(0:8)) +
  labs(x = 'Range, km^2') +
  facet_grid(taxon ~ .)

ggsave(filename = file.path(dir_git, 'ms_figures', 'fig_SI_taxa_ranges.png'), 
           width = 6, height = 4, units = 'in', dpi = 300)